<!DOCTYPE html>
<html lang="zh-CN">
<head>
  <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=2">
<meta name="theme-color" content="#222">
<meta name="generator" content="Hexo 7.2.0">
  <link rel="apple-touch-icon" sizes="180x180" href="/images/ME.ico">
  <link rel="icon" type="image/png" sizes="32x32" href="/images/ME.ico">
  <link rel="icon" type="image/png" sizes="16x16" href="/images/ME.ico">
  <link rel="mask-icon" href="/images/ME.ico" color="#222">

<link rel="stylesheet" href="/css/main.css">


<link rel="stylesheet" href="/lib/font-awesome/css/all.min.css">

<script id="hexo-configurations">
    var NexT = window.NexT || {};
    var CONFIG = {"hostname":"www.zhoubencheng.com","root":"/","scheme":"Pisces","version":"7.8.0","exturl":false,"sidebar":{"position":"left","display":"post","padding":18,"offset":12,"onmobile":false},"copycode":{"enable":false,"show_result":true,"style":"mac"},"back2top":{"enable":true,"sidebar":false,"scrollpercent":true},"bookmark":{"enable":false,"color":"#222","save":"auto"},"fancybox":false,"mediumzoom":false,"lazyload":false,"pangu":false,"comments":{"style":"tabs","active":null,"storage":true,"lazyload":false,"nav":null},"algolia":{"hits":{"per_page":10},"labels":{"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}},"localsearch":{"enable":false,"trigger":"auto","top_n_per_article":1,"unescape":false,"preload":false},"motion":{"enable":true,"async":false,"transition":{"post_block":"fadeIn","post_header":"slideDownIn","post_body":"slideDownIn","coll_header":"slideLeftIn","sidebar":"slideUpIn"}}};
  </script>

  <meta name="description" content="在大模型推理中，我们通常可以降低模型权重和激活值的位宽来降低模型的存储空间、提高推理效率，而这样的操作也称为量化。通过量化将浮点模型(float32)转化为低比特的定点模型(int8)，使得模型运行的存储压力和计算复杂性都有所降低，从而使模型从GPU端移植到板载系统中运行成为可能">
<meta property="og:type" content="article">
<meta property="og:title" content="量化和反量化-pytorch">
<meta property="og:url" content="http://www.zhoubencheng.com/2024/08/21/%E9%87%8F%E5%8C%96%E5%92%8C%E5%8F%8D%E9%87%8F%E5%8C%96-pytorch/index.html">
<meta property="og:site_name" content="Benson&#39;s blog">
<meta property="og:description" content="在大模型推理中，我们通常可以降低模型权重和激活值的位宽来降低模型的存储空间、提高推理效率，而这样的操作也称为量化。通过量化将浮点模型(float32)转化为低比特的定点模型(int8)，使得模型运行的存储压力和计算复杂性都有所降低，从而使模型从GPU端移植到板载系统中运行成为可能">
<meta property="og:locale" content="zh_CN">
<meta property="og:image" content="https://pic1.zhimg.com/70/v2-5b8e340810bf489ea58b7dd8366fbea6_1440w.avis?source=172ae18b&amp;biz_tag=Post">
<meta property="article:published_time" content="2024-08-21T14:51:44.000Z">
<meta property="article:modified_time" content="2024-08-23T07:51:53.984Z">
<meta property="article:author" content="Benson">
<meta property="article:tag" content="pytorch">
<meta property="article:tag" content="机器学习">
<meta property="article:tag" content="概率论">
<meta name="twitter:card" content="summary">
<meta name="twitter:image" content="https://pic1.zhimg.com/70/v2-5b8e340810bf489ea58b7dd8366fbea6_1440w.avis?source=172ae18b&amp;biz_tag=Post">

<link rel="canonical" href="http://www.zhoubencheng.com/2024/08/21/%E9%87%8F%E5%8C%96%E5%92%8C%E5%8F%8D%E9%87%8F%E5%8C%96-pytorch/">


<script id="page-configurations">
  // https://hexo.io/docs/variables.html
  CONFIG.page = {
    sidebar: "",
    isHome : false,
    isPost : true,
    lang   : 'zh-CN'
  };
</script>

  <title>量化和反量化-pytorch | Benson's blog</title>
  






  <noscript>
  <style>
  .use-motion .brand,
  .use-motion .menu-item,
  .sidebar-inner,
  .use-motion .post-block,
  .use-motion .pagination,
  .use-motion .comments,
  .use-motion .post-header,
  .use-motion .post-body,
  .use-motion .collection-header { opacity: initial; }

  .use-motion .site-title,
  .use-motion .site-subtitle {
    opacity: initial;
    top: initial;
  }

  .use-motion .logo-line-before i { left: initial; }
  .use-motion .logo-line-after i { right: initial; }
  </style>
</noscript>


<style>.github-emoji { position: relative; display: inline-block; width: 1.2em; min-height: 1.2em; overflow: hidden; vertical-align: top; color: transparent; }  .github-emoji > span { position: relative; z-index: 10; }  .github-emoji img, .github-emoji .fancybox { margin: 0 !important; padding: 0 !important; border: none !important; outline: none !important; text-decoration: none !important; user-select: none !important; cursor: auto !important; }  .github-emoji img { height: 1.2em !important; width: 1.2em !important; position: absolute !important; left: 50% !important; top: 50% !important; transform: translate(-50%, -50%) !important; user-select: none !important; cursor: auto !important; } .github-emoji-fallback { color: inherit; } .github-emoji-fallback img { opacity: 0 !important; }</style>
</head>

<body itemscope itemtype="http://schema.org/WebPage">
  <div class="container use-motion">
    <div class="headband"></div>

    <header class="header" itemscope itemtype="http://schema.org/WPHeader">
      <div class="header-inner"><div class="site-brand-container">
  <div class="site-nav-toggle">
    <div class="toggle" aria-label="切换导航栏">
      <span class="toggle-line toggle-line-first"></span>
      <span class="toggle-line toggle-line-middle"></span>
      <span class="toggle-line toggle-line-last"></span>
    </div>
  </div>

  <div class="site-meta">

    <a href="/" class="brand" rel="start">
      <span class="logo-line-before"><i></i></span>
      <h1 class="site-title">Benson's blog</h1>
      <span class="logo-line-after"><i></i></span>
    </a>
      <p class="site-subtitle" itemprop="description">最好的安排</p>
  </div>

  <div class="site-nav-right">
    <div class="toggle popup-trigger">
    </div>
  </div>
</div>




<nav class="site-nav">
  <ul id="menu" class="main-menu menu">
        <li class="menu-item menu-item-home">

    <a href="/" rel="section"><i class="fa fa-home fa-fw"></i>首页</a>

  </li>
        <li class="menu-item menu-item-tags">

    <a href="/tags/" rel="section"><i class="fa fa-tags fa-fw"></i>标签</a>

  </li>
        <li class="menu-item menu-item-categories">

    <a href="/categories/" rel="section"><i class="fa fa-th fa-fw"></i>分类</a>

  </li>
        <li class="menu-item menu-item-archives">

    <a href="/archives/" rel="section"><i class="fa fa-archive fa-fw"></i>归档</a>

  </li>
  </ul>
</nav>




</div>
    </header>

    
  <div class="back-to-top">
    <i class="fa fa-arrow-up"></i>
    <span>0%</span>
  </div>

  <a href="https://github.com/ZhouBencheng" class="github-corner" title="Follow me on GitHub" aria-label="Follow me on GitHub" rel="noopener" target="_blank"><svg width="80" height="80" viewBox="0 0 250 250" aria-hidden="true"><path d="M0,0 L115,115 L130,115 L142,142 L250,250 L250,0 Z"></path><path d="M128.3,109.0 C113.8,99.7 119.0,89.6 119.0,89.6 C122.0,82.7 120.5,78.6 120.5,78.6 C119.2,72.0 123.4,76.3 123.4,76.3 C127.3,80.9 125.5,87.3 125.5,87.3 C122.9,97.6 130.6,101.9 134.4,103.2" fill="currentColor" style="transform-origin: 130px 106px;" class="octo-arm"></path><path d="M115.0,115.0 C114.9,115.1 118.7,116.5 119.8,115.4 L133.7,101.6 C136.9,99.2 139.9,98.4 142.2,98.6 C133.8,88.0 127.5,74.4 143.8,58.0 C148.5,53.4 154.0,51.2 159.7,51.0 C160.3,49.4 163.2,43.6 171.4,40.1 C171.4,40.1 176.1,42.5 178.8,56.2 C183.1,58.6 187.2,61.8 190.9,65.4 C194.5,69.0 197.7,73.2 200.1,77.6 C213.8,80.2 216.3,84.9 216.3,84.9 C212.7,93.1 206.9,96.0 205.4,96.6 C205.1,102.4 203.0,107.8 198.3,112.5 C181.9,128.9 168.3,122.5 157.7,114.1 C157.9,116.9 156.7,120.9 152.7,124.9 L141.0,136.5 C139.8,137.7 141.6,141.9 141.8,141.8 Z" fill="currentColor" class="octo-body"></path></svg></a>


    <main class="main">
      <div class="main-inner">
        <div class="content-wrap">
          

          <div class="content post posts-expand">
            

    
  
  
  <article itemscope itemtype="http://schema.org/Article" class="post-block" lang="zh-CN">
    <link itemprop="mainEntityOfPage" href="http://www.zhoubencheng.com/2024/08/21/%E9%87%8F%E5%8C%96%E5%92%8C%E5%8F%8D%E9%87%8F%E5%8C%96-pytorch/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="image" content="/images/ME.ico">
      <meta itemprop="name" content="Benson">
      <meta itemprop="description" content="Share my thoughts with you">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="Benson's blog">
    </span>
      <header class="post-header">
        <h1 class="post-title" itemprop="name headline">
          量化和反量化-pytorch
        </h1>

        <div class="post-meta">
            <span class="post-meta-item">
              <span class="post-meta-item-icon">
                <i class="far fa-calendar"></i>
              </span>
              <span class="post-meta-item-text">发表于</span>

              <time title="创建时间：2024-08-21 22:51:44" itemprop="dateCreated datePublished" datetime="2024-08-21T22:51:44+08:00">2024-08-21</time>
            </span>
              <span class="post-meta-item">
                <span class="post-meta-item-icon">
                  <i class="far fa-calendar-check"></i>
                </span>
                <span class="post-meta-item-text">更新于</span>
                <time title="修改时间：2024-08-23 15:51:53" itemprop="dateModified" datetime="2024-08-23T15:51:53+08:00">2024-08-23</time>
              </span>
            <span class="post-meta-item">
              <span class="post-meta-item-icon">
                <i class="far fa-folder"></i>
              </span>
              <span class="post-meta-item-text">分类于</span>
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/categories/%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0/" itemprop="url" rel="index"><span itemprop="name">机器学习</span></a>
                </span>
            </span>

          
            <span class="post-meta-item" title="阅读次数" id="busuanzi_container_page_pv" style="display: none;">
              <span class="post-meta-item-icon">
                <i class="fa fa-eye"></i>
              </span>
              <span class="post-meta-item-text">阅读次数：</span>
              <span id="busuanzi_value_page_pv"></span>
            </span>

        </div>
      </header>

    
    
    
    <div class="post-body" itemprop="articleBody">

      
        <p>在大模型推理中，我们通常可以降低<strong>模型权重</strong>和<strong>激活值</strong>的位宽来降低模型的存储空间、提高推理效率，而这样的操作也称为<strong>量化</strong>。通过量化将浮点模型<code>(float32)</code>转化为低比特的定点模型<code>(int8)</code>，使得模型运行的存储压力和计算复杂性都有所降低，从而使模型从GPU端移植到板载系统中运行成为可能</p>
<span id="more"></span>
<h2 id="int8量化">INT8量化</h2>
<p><img src="https://pic1.zhimg.com/70/v2-5b8e340810bf489ea58b7dd8366fbea6_1440w.avis?source=172ae18b&amp;biz_tag=Post"></p>
<h3 id="非饱和量化">非饱和量化</h3>
<p>非饱和量化是一种最简单粗暴的量化方法，其原理是根据<code>float32</code>数据中的<strong>最大绝对值</strong>计算出缩放因子<code>scale</code>，并使用线性映射的方法将<code>float32</code>中的值域转化到<code>int8</code>的值域(-127,
127)中 <span class="math display">\[
R = scale * (value + zero\_point)
\]</span>
非饱和量化的阈值选取方法能够将所有数据都映射到<code>INT8</code>数据域中，但非零点对称的分布会造成映射空间的浪费</p>
<h3 id="饱和量化">饱和量化</h3>
<p>饱和量化采取一定程度截断的方法，舍去出现频率低的<span class="math inline">\(|x|\)</span>最大值，是的输入数据大致能够零点对称，而关于饱和量化中的阈值选取，参考<a href="#KL算法">KL散度算法</a></p>
<blockquote>
<p>已知在量化的线性映射公式(1)中，当zero_point为0时为对称量化，当zero_point不为0时为非对称量化</p>
</blockquote>
<h2>
<span id="KL算法">KL散度</span>
</h2>
<p><a target="_blank" rel="noopener" href="https://zhuanlan.zhihu.com/p/37452654">概念参考链接</a> <a target="_blank" rel="noopener" href="https://hsinjhao.github.io/2019/05/22/KL-DivergenceIntroduction/">数学性质参考链接</a></p>
<p>KL散度全称为kullback-leible散度，该算法用于衡量两个分布之间的<strong>匹配程度</strong>
<span class="math display">\[
D_{KL}(p||q) = \sum_{i = 1}^{N}p(x_i)\log({p(x_i)\over q(x_i)})
\]</span> 其中<span class="math inline">\(p(x_i)\)</span>表示真实分布，<span class="math inline">\(q(x_i)\)</span>表示我们去近似<span class="math inline">\(p(x_i)\)</span>的分布。当KL散度计算值越接近0，两个分布的近似程度越高，反之差距越大。而KL散度具有<strong>正定性</strong>和<strong>非对称性</strong></p>

    </div>

    
    
    

      <footer class="post-footer">
          
          <div class="post-tags">
              <a href="/tags/pytorch/" rel="tag"><i class="fa fa-tag"></i> pytorch</a>
              <a href="/tags/%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0/" rel="tag"><i class="fa fa-tag"></i> 机器学习</a>
              <a href="/tags/%E6%A6%82%E7%8E%87%E8%AE%BA/" rel="tag"><i class="fa fa-tag"></i> 概率论</a>
          </div>

        


        
    <div class="post-nav">
      <div class="post-nav-item">
    <a href="/2024/08/16/cuda%E7%BC%96%E7%A8%8B/" rel="prev" title="cuda编程">
      <i class="fa fa-chevron-left"></i> cuda编程
    </a></div>
      <div class="post-nav-item">
    <a href="/2024/09/02/Github-Action/" rel="next" title="Github_Action">
      Github_Action <i class="fa fa-chevron-right"></i>
    </a></div>
    </div>
      </footer>
    
  </article>
  
  
  



          </div>
          

<script>
  window.addEventListener('tabs:register', () => {
    let { activeClass } = CONFIG.comments;
    if (CONFIG.comments.storage) {
      activeClass = localStorage.getItem('comments_active') || activeClass;
    }
    if (activeClass) {
      let activeTab = document.querySelector(`a[href="#comment-${activeClass}"]`);
      if (activeTab) {
        activeTab.click();
      }
    }
  });
  if (CONFIG.comments.storage) {
    window.addEventListener('tabs:click', event => {
      if (!event.target.matches('.tabs-comment .tab-content .tab-pane')) return;
      let commentClass = event.target.classList[1];
      localStorage.setItem('comments_active', commentClass);
    });
  }
</script>

        </div>
          
  
  <div class="toggle sidebar-toggle">
    <span class="toggle-line toggle-line-first"></span>
    <span class="toggle-line toggle-line-middle"></span>
    <span class="toggle-line toggle-line-last"></span>
  </div>

  <aside class="sidebar">
    <div class="sidebar-inner">

      <ul class="sidebar-nav motion-element">
        <li class="sidebar-nav-toc">
          文章目录
        </li>
        <li class="sidebar-nav-overview">
          站点概览
        </li>
      </ul>

      <!--noindex-->
      <div class="post-toc-wrap sidebar-panel">
          <div class="post-toc motion-element"><ol class="nav"><li class="nav-item nav-level-2"><a class="nav-link" href="#int8%E9%87%8F%E5%8C%96"><span class="nav-number">1.</span> <span class="nav-text">INT8量化</span></a><ol class="nav-child"><li class="nav-item nav-level-3"><a class="nav-link" href="#%E9%9D%9E%E9%A5%B1%E5%92%8C%E9%87%8F%E5%8C%96"><span class="nav-number">1.1.</span> <span class="nav-text">非饱和量化</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#%E9%A5%B1%E5%92%8C%E9%87%8F%E5%8C%96"><span class="nav-number">1.2.</span> <span class="nav-text">饱和量化</span></a></li></ol></li><li class="nav-item nav-level-2"><a class="nav-link"><span class="nav-number">2.</span> <span class="nav-text">
KL散度
</span></a></li></ol></div>
      </div>
      <!--/noindex-->

      <div class="site-overview-wrap sidebar-panel">
        <div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
    <img class="site-author-image" itemprop="image" alt="Benson"
      src="/images/ME.ico">
  <p class="site-author-name" itemprop="name">Benson</p>
  <div class="site-description" itemprop="description">Share my thoughts with you</div>
</div>
<div class="site-state-wrap motion-element">
  <nav class="site-state">
      <div class="site-state-item site-state-posts">
          <a href="/archives/">
        
          <span class="site-state-item-count">17</span>
          <span class="site-state-item-name">日志</span>
        </a>
      </div>
      <div class="site-state-item site-state-categories">
            <a href="/categories/">
          
        <span class="site-state-item-count">7</span>
        <span class="site-state-item-name">分类</span></a>
      </div>
      <div class="site-state-item site-state-tags">
            <a href="/tags/">
          
        <span class="site-state-item-count">25</span>
        <span class="site-state-item-name">标签</span></a>
      </div>
  </nav>
</div>
  <div class="links-of-author motion-element">
      <span class="links-of-author-item">
        <a href="https://github.com/ZhouBencheng" title="GitHub → https:&#x2F;&#x2F;github.com&#x2F;ZhouBencheng" rel="noopener" target="_blank"><i class="fab fa-github fa-fw"></i>GitHub</a>
      </span>
      <span class="links-of-author-item">
        <a href="mailto:zhoubencheng2022@163.com" title="E-Mail → mailto:zhoubencheng2022@163.com" rel="noopener" target="_blank"><i class="fa fa-envelope fa-fw"></i>E-Mail</a>
      </span>
  </div>



      </div>

    </div>
  </aside>
  <div id="sidebar-dimmer"></div>


      </div>
    </main>

    <footer class="footer">
      <div class="footer-inner">
        

        

<div class="copyright">
  
  &copy; 
  <span itemprop="copyrightYear">2024</span>
  <span class="with-love">
    <i class="fa fa-heart"></i>
  </span>
  <span class="author" itemprop="copyrightHolder">Benson</span>
</div>

<!--
  <div class="powered-by">由 <a href="https://hexo.io/" class="theme-link" rel="noopener" target="_blank">Hexo</a> & <a href="https://pisces.theme-next.org/" class="theme-link" rel="noopener" target="_blank">NexT.Pisces</a> 强力驱动
  </div>
-->

        
<div class="busuanzi-count">
  <script async src="https://busuanzi.ibruce.info/busuanzi/2.3/busuanzi.pure.mini.js"></script>
    <span class="post-meta-item" id="busuanzi_container_site_uv" style="display: none;">
      <span class="post-meta-item-icon">
        <i class="fa fa-user"></i>
      </span>
      <span class="site-uv" title="总访客量">
        <span id="busuanzi_value_site_uv"></span>
      </span>
    </span>
    <span class="post-meta-divider">|</span>
    <span class="post-meta-item" id="busuanzi_container_site_pv" style="display: none;">
      <span class="post-meta-item-icon">
        <i class="fa fa-eye"></i>
      </span>
      <span class="site-pv" title="总访问量">
        <span id="busuanzi_value_site_pv"></span>
      </span>
    </span>
</div>








      </div>
    </footer>
  </div>

  
  <script src="/lib/anime.min.js"></script>
  <script src="/lib/velocity/velocity.min.js"></script>
  <script src="/lib/velocity/velocity.ui.min.js"></script>

<script src="/js/utils.js"></script>

<script src="/js/motion.js"></script>


<script src="/js/schemes/pisces.js"></script>


<script src="/js/next-boot.js"></script>


  <script defer src="/lib/three/three.min.js"></script>
    <script defer src="/lib/three/canvas_lines.min.js"></script>


  













<script>
if (document.querySelectorAll('pre.mermaid').length) {
  NexT.utils.getScript('//cdn.jsdelivr.net/npm/mermaid@8/dist/mermaid.min.js', () => {
    mermaid.initialize({
      theme    : 'dark',
      logLevel : 3,
      flowchart: { curve     : 'linear' },
      gantt    : { axisFormat: '%m/%d/%Y' },
      sequence : { actorMargin: 50 }
    });
  }, window.mermaid);
}
</script>


  

  
      

<script>
  if (typeof MathJax === 'undefined') {
    window.MathJax = {
      loader: {
        source: {
          '[tex]/amsCd': '[tex]/amscd',
          '[tex]/AMScd': '[tex]/amscd'
        }
      },
      tex: {
        inlineMath: {'[+]': [['$', '$']]},
        tags: 'ams'
      },
      options: {
        renderActions: {
          findScript: [10, doc => {
            document.querySelectorAll('script[type^="math/tex"]').forEach(node => {
              const display = !!node.type.match(/; *mode=display/);
              const math = new doc.options.MathItem(node.textContent, doc.inputJax[0], display);
              const text = document.createTextNode('');
              node.parentNode.replaceChild(text, node);
              math.start = {node: text, delim: '', n: 0};
              math.end = {node: text, delim: '', n: 0};
              doc.math.push(math);
            });
          }, '', false],
          insertedScript: [200, () => {
            document.querySelectorAll('mjx-container').forEach(node => {
              let target = node.parentNode;
              if (target.nodeName.toLowerCase() === 'li') {
                target.parentNode.classList.add('has-jax');
              }
            });
          }, '', false]
        }
      }
    };
    (function () {
      var script = document.createElement('script');
      script.src = '//cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js';
      script.defer = true;
      document.head.appendChild(script);
    })();
  } else {
    MathJax.startup.document.state(0);
    MathJax.texReset();
    MathJax.typeset();
  }
</script>

    

  

</body>
</html>
